# mTurk maker 
################################################################################
# Dependencies
################################################################################
library(data.table)
library(purrr)
library(lubridate)
library(readr)
library(dplyr)
library(readr)
library(readxl)
library(irr)
library(tidyr)
################################################################################
# Setup
################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)

emo_vec <- read_file("./emoji_regex_vector.txt")

emojy_replace <- function(string, replacment, emo_vec){
  stringr::str_replace_all(string, emo_vec, replacment)
}
################################################################################
# Relevance
################################################################################
org <- read_xlsx("../data/research_assistants_data/relevance_task/annotation_data_tweets23_ra_completed.xlsx")
org <- org %>% filter(!is.na(relevant_paula) & !is.na(relevant_fabio)) 

org <- org %>% mutate(relevance_agreement_check = ifelse(relevant_paula == relevant_fabio, TRUE, FALSE))

#RA Agreement
org %>% group_by(relevance_agreement_check) %>% summarise(n = n()) %>% mutate(f = n/sum(n))

org <- org %>% mutate(relevant_ra = ifelse(relevance_agreement_check == T, relevant_fabio, NA))

write_csv(org,"../data/research_assistants_data/relevance_task/training_data_tweets23_relevance.csv")
org <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))
write_csv(org,"../data/mturk_data/training_data_tweets23_relevance.csv")
################################################################################
# Problem Solution
################################################################################
org <- read_xlsx("../data/research_assistants_data/problem_solution_task/annotation_data_tweets23_ra_completed.xlsx")
org <- org %>% filter(!is.na(relevant_paula) & !is.na(relevant_fabio)) 

org <- org %>% mutate(relevance_agreement_check = ifelse(relevant_paula == relevant_fabio, TRUE, FALSE))
org <- org %>% filter(relevance_agreement_check == TRUE) %>% 
  mutate(problem_agreement_check = ifelse(problem_frame_paula == problem_frame_fabio, TRUE, FALSE),
         solution_agreement_check = ifelse(solution_frame_paula == solution_frame_fabio, TRUE, FALSE))


org <- org %>% mutate(problem_solution_agreement_check = ifelse(solution_agreement_check == T & problem_agreement_check == T, TRUE, FALSE))
org <- org %>% filter(is.na(problem_solution_agreement_check) == F)
#RA Agreement
org %>% group_by(problem_solution_agreement_check) %>% summarise(n = n()) %>% mutate(f = n/sum(n))

org <- org %>% 
  mutate(problem_solution_ra = ifelse(problem_solution_agreement_check == T & (problem_frame_paula + problem_frame_fabio + solution_frame_paula + solution_frame_fabio) == 4, "Both",
                                      ifelse(problem_solution_agreement_check == T & (problem_frame_paula + problem_frame_fabio + solution_frame_paula + solution_frame_fabio) == 0, "Neither",
                                             ifelse(problem_solution_agreement_check == T & problem_frame_fabio == 1, "Problem",
                                                    ifelse(problem_solution_agreement_check == T & solution_frame_paula, "Solution", NA)))))

org <- org %>% select(-c(problem_agreement_check,solution_agreement_check))
org <- org %>% distinct(status_id, .keep_all = T)

write_csv(org,"../data/research_assistants_data/problem_solution_task/training_data_tweets23_problem_solution.csv")
org <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))
org <- org %>% filter(relevance_agreement_check == T) %>% filter(relevant_paula == 1)

org <- org %>% filter(!problem_solution_ra %in% c("Both"))

write_csv(org,"../data/mturk_data/training_data_tweets23_problem_solution_final.csv")

